library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.1 ──
## ✓ ggplot2 3.3.5 ✓ purrr 0.3.4
## ✓ tibble 3.1.6 ✓ dplyr 1.0.7
## ✓ tidyr 1.1.4 ✓ stringr 1.4.0
## ✓ readr 2.1.0 ✓ forcats 0.5.1
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## x dplyr::filter() masks stats::filter()
## x dplyr::lag() masks stats::lag()
library(readxl)
library(rvest)
##
## Attaching package: 'rvest'
## The following object is masked from 'package:readr':
##
## guess_encoding
library(httr)
library(lubridate)
##
## Attaching package: 'lubridate'
## The following objects are masked from 'package:base':
##
## date, intersect, setdiff, union
library(flexdashboard)
library(plotly)
##
## Attaching package: 'plotly'
## The following object is masked from 'package:httr':
##
## config
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
Import dataset
raw_sub_crime =
read_csv("./data/subwaycrime.csv") %>%
janitor::clean_names()
## New names:
## * `` -> ...1
## Warning: One or more parsing issues, see `problems()` for details
## Rows: 6244 Columns: 37
## ── Column specification ────────────────────────────────────────────────────────
## Delimiter: ","
## chr (20): BORO_NM, CMPLNT_FR_DT, CMPLNT_TO_DT, CRM_ATPT_CPTD_CD, JURIS_DESC...
## dbl (11): ...1, CMPLNT_NUM, ADDR_PCT_CD, JURISDICTION_CODE, KY_CD, PD_CD, T...
## lgl (4): HADEVELOPT, HOUSING_PSA, LOC_OF_OCCUR_DESC, PARKS_NM
## time (2): CMPLNT_FR_TM, CMPLNT_TO_TM
##
## ℹ Use `spec()` to retrieve the full column specification for this data.
## ℹ Specify the column types or set `show_col_types = FALSE` to quiet this message.
raw_sub_station =
read_xlsx("./data/subway_info_final.xlsx") %>%
janitor::clean_names()
Crime event v.s. Month
sub_crime_freq =
raw_sub_crime %>%
select(cmplnt_fr_dt, cmplnt_fr_tm, ofns_desc, station_name, latitude, longitude) %>%
rename("date" = "cmplnt_fr_dt", "time" = "cmplnt_fr_tm", "crime_event" = "ofns_desc") %>%
mutate(date = as.Date(date, "%m/%d/%Y")) %>%
mutate(date = substring(as.character(as.Date(date, "%m/%d/%y")),1,7)) %>%
filter(!(date %in% c("1971-09","2016-11","2018-04","2018-05")))
plot_1 =
sub_crime_freq %>%
group_by(date) %>%
summarise(event_num = n()) %>%
plot_ly(
x = ~date, y = ~event_num, type = "scatter", mode = "markers"
)
layout(plot_1, title = "Crime events over time", xaxis = list(title = "Month"), yaxis = list(title = "Number of Crime Events"))
Top 5 Most Frequent crime events change over time
most_freq_event =
sub_crime_freq %>%
group_by(crime_event) %>%
summarize(event_num = n()) %>%
mutate(rank = min_rank(desc(event_num))) %>%
filter(rank < 6)
sub_crime_freq %>%
filter(crime_event %in% c("ASSAULT 3 & RELATED OFFENSES", "CRIMINAL MISCHIEF & RELATED OF", "DANGEROUS DRUGS", "GRAND LARCENY", "HARRASSMENT 2")) %>%
group_by(crime_event, date) %>%
summarize(event_num = n()) %>%
ggplot(aes(x = date, y = event_num, color = crime_event)) +
geom_line() +
labs(
title = "Top 5 Most Frequent Events Over Time",
x = "Month",
y = "Crime Events Times"
) +
theme(legend.text=element_text(size=7))
## `summarise()` has grouped output by 'crime_event'. You can override using the `.groups` argument.
## geom_path: Each group consists of only one observation. Do you need to adjust
## the group aesthetic?

Crime events v.s. Time
sub_crime_time =
raw_sub_crime %>%
select(cmplnt_fr_dt, cmplnt_fr_tm, ofns_desc, station_name, latitude, longitude) %>%
rename("start_date" = "cmplnt_fr_dt", "start_time" = "cmplnt_fr_tm", "crime_event" = "ofns_desc") %>%
mutate(
event_time = as.character(case_when(
hms("00:00:00") <= start_time & start_time < hms("02:00:00") ~hms("00:00:00"),
hms("02:00:00") <= start_time & start_time < hms("04:00:00") ~hms("04:00:00"),
hms("04:00:00") <= start_time & start_time < hms("06:00:00") ~hms("04:00:00"),
hms("06:00:00") <= start_time & start_time < hms("08:00:00") ~hms("08:00:00"),
hms("08:00:00") <= start_time & start_time < hms("10:00:00") ~hms("08:00:00"),
hms("10:00:00") <= start_time & start_time < hms("12:00:00") ~hms("12:00:00"),
hms("12:00:00") <= start_time & start_time < hms("14:00:00") ~hms("12:00:00"),
hms("14:00:00") <= start_time & start_time < hms("16:00:00") ~hms("16:00:00"),
hms("16:00:00") <= start_time & start_time < hms("18:00:00") ~hms("16:00:00"),
hms("18:00:00") <= start_time & start_time < hms("20:00:00") ~hms("20:00:00"),
hms("20:00:00") <= start_time & start_time < hms("23:59:59") ~hms("20:00:00"),
))
) %>%
filter(crime_event %in% c("CRIMINAL MISCHIEF & RELATED OF", "ASSAULT 3 & RELATED OFFENSES","HARRASSMENT 2","GRAND LARCENY","DANGEROUS DRUGS","FELONY ASSAULT","ROBBERY","PETIT LARCENY","FORGERY","SEX CRIMES","OFF. AGNST PUB ORD SENSBLTY &","DANGEROUS WEAPONS","THEFT OF SERVICES","OFFENSES AGAINST PUBLIC ADMINI"))
plot_2 =
sub_crime_time %>%
mutate(event_time = as.factor(event_time)) %>%
ggplot(aes(x = event_time %>% fct_infreq(), fill = crime_event)) +
geom_histogram(stat = "count", width = 0.9, height = 2) +
labs(
title = "Frequency of crime events v.s. Time points",
x = "Occurrence time",
y = "Frequency of crime events") +
theme_bw() +
theme(
plot.title = element_text(hjust = 1),
legend.position = "bottom",
legend.text = element_text(size = 8)) +
guides(col = guide_legend(nrow = 2))
## Warning: Ignoring unknown parameters: binwidth, bins, pad, height
ggplotly(plot_2) %>%
layout(legend = list(
orientation = "h",
xanchor = "center",
yanchor = "top",
x = 0.3,
y = - 0.3
)
)
Response time
crime_response_time =
raw_sub_crime %>%
rename("start_date" = "cmplnt_fr_dt", "start_time" = "cmplnt_fr_tm", "end_date" = "cmplnt_to_dt", "end_time" = "cmplnt_to_tm", "crime_event" = "ofns_desc") %>%
select(start_date, start_time, end_date, end_time, crime_event, law_cat_cd) %>%
drop_na(start_time, end_time) %>%
mutate(start_date = as.character(as.Date(start_date, "%m/%d/%Y")),
end_date = as.character(as.Date(end_date, "%m/%d/%Y"))) %>%
mutate(start = as.POSIXct(paste(start_date, start_time), format = "%Y-%m-%d %H:%M:%S"),
end = as.POSIXct(paste(end_date, end_time), format = "%Y-%m-%d %H:%M:%S")) %>%
mutate(response_time = difftime(end, start, units = "hours")) %>%
mutate(
event_time = as.character(case_when(
hms("00:00:00") <= start_time & start_time < hms("02:00:00") ~hms("00:00:00"),
hms("02:00:00") <= start_time & start_time < hms("04:00:00") ~hms("04:00:00"),
hms("04:00:00") <= start_time & start_time < hms("06:00:00") ~hms("04:00:00"),
hms("06:00:00") <= start_time & start_time < hms("08:00:00") ~hms("08:00:00"),
hms("08:00:00") <= start_time & start_time < hms("10:00:00") ~hms("08:00:00"),
hms("10:00:00") <= start_time & start_time < hms("12:00:00") ~hms("12:00:00"),
hms("12:00:00") <= start_time & start_time < hms("14:00:00") ~hms("12:00:00"),
hms("14:00:00") <= start_time & start_time < hms("16:00:00") ~hms("16:00:00"),
hms("16:00:00") <= start_time & start_time < hms("18:00:00") ~hms("16:00:00"),
hms("18:00:00") <= start_time & start_time < hms("20:00:00") ~hms("20:00:00"),
hms("20:00:00") <= start_time & start_time < hms("23:59:59") ~hms("20:00:00"),
))
)
crime_response_time %>%
arrange(desc(response_time))
## # A tibble: 5,802 × 10
## start_date start_time end_date end_time crime_event law_cat_cd
## <chr> <time> <chr> <time> <chr> <chr>
## 1 1971-09-09 19:00 2021-09-09 19:03 CRIMINAL MISCHIEF & REL… MISDEMEAN…
## 2 2019-09-20 00:01 2021-06-03 00:30 PETIT LARCENY MISDEMEAN…
## 3 2020-03-18 00:01 2021-06-09 02:45 PETIT LARCENY MISDEMEAN…
## 4 2020-02-05 07:20 2021-02-05 07:29 CRIMINAL MISCHIEF & REL… MISDEMEAN…
## 5 2020-01-07 13:30 2021-01-07 13:00 CRIMINAL MISCHIEF & REL… MISDEMEAN…
## 6 2021-05-28 04:38 2021-09-29 05:25 THEFT OF SERVICES MISDEMEAN…
## 7 2021-02-02 10:00 2021-06-02 10:07 ASSAULT 3 & RELATED OFF… MISDEMEAN…
## 8 2020-04-25 09:55 2020-07-05 05:23 CRIMINAL MISCHIEF & REL… FELONY
## 9 2020-12-18 00:00 2021-02-26 12:00 FRAUDS MISDEMEAN…
## 10 2020-11-08 05:32 2020-12-30 08:47 CRIMINAL MISCHIEF & REL… FELONY
## # … with 5,792 more rows, and 4 more variables: start <dttm>, end <dttm>,
## # response_time <drtn>, event_time <chr>